---
title: "Dysregulation of Cerebral Hemoglobin Content in Malawian Children with Malaria"
author: "Rachel Smith"
date: "`r Sys.Date()`"
output: html_document
---

<style type="text/css">
  body{
  font-family: Arial;
  font-size: 12pt;
}
</style>

```{r setup, include = FALSE}

knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE, comment = "", fig.height = 6, fig.width = 10)

library(tidyverse)
library(DescTools)
library(janitor)
library(FSA)
library(factoextra)
library(ggrepel)
library(ggpubr)
library(tidymodels)
library(vip)
library(scales)
library(reshape)
library(extrafont)
library(grDevices)

load("glm_model.Rdata")
load("results.Rdata")
load("roc_curves.Rdata")
load("dfa_example.Rdata")
load("dfa_results.Rdata")
load("all_dfa_results.Rdata")

font_import()

```

```{r data}

# read in data

  df_hbox <- read.csv("Data/master_datatable.csv") %>% 
    as_tibble() %>% 
    dplyr::select(-1) %>% 
    mutate(Status = replace(Status, Status == "HV", "HC")) %>% 
    mutate(Status = factor(Status, levels = c("HC", "UM", "CM")))

# pivot long

  df_hbox_long <- df_hbox %>% pivot_longer(cols = 7:ncol(.), names_to = "variable", values_to = "value")

```

### Table 1: Summary of clinical data by patient group

```{r table1}

# number of subjects 
df_status <- df_hbox %>% 
  count(Status) %>% 
  pivot_wider(1:2, names_from = "Status", values_from = "n") %>% 
  mutate(variable = "Number of subjects") %>% 
  dplyr::select(variable, everything()) %>% 
  mutate_if(is.numeric, as.character)

# Sex
df_sex <- df_hbox %>% 
  group_by(Status) %>% 
  count(Sex) %>% 
  dplyr::filter(Sex != "") %>% 
  pivot_wider(names_from = "Sex", values_from = "n") %>% 
  mutate(perc_fem = round(Female/(Female + Male)*100, 2)) %>% 
  
  pivot_wider(c(1,4), names_from = "Status", values_from = "perc_fem") %>% 
  mutate(variable = "Sex (% female)") %>% 
  dplyr::select(variable, everything()) %>% 
  mutate_if(is.numeric, as.character)

# other numeric variables

df_clinical_res <- df_hbox %>% 
  
  # dplyr::filter(Subject.ID..NIAID. != "TM1018") %>% 
  
  group_by(Status) %>% 
  summarise(`Age (years)` = median(Age, na.rm = TRUE), age_q1 = quantile(Age, 0.25, na.rm = TRUE), age_q3 = quantile(Age, 0.75, na.rm = TRUE),
            
            `Temperature (°C)` = median(Temperature, na.rm = TRUE), temp_q1 = quantile(Temperature, 0.25, na.rm = TRUE), temp_q3 = quantile(Temperature, 0.75, na.rm = TRUE),
            `Heart rate` = median(HR, na.rm = TRUE), hr_q1 = quantile(HR, 0.25, na.rm = TRUE), hr_q3 = quantile(HR, 0.75, na.rm = TRUE),
            `Systolic BP` = median(BP.Systolic, na.rm = TRUE), sys_q1 = quantile(BP.Systolic, 0.25, na.rm = TRUE), sys_q3 = quantile(BP.Systolic, 0.75, na.rm = TRUE),
            `Diastolic BP` = median(BP.Diastolic, na.rm = TRUE), dia_q1 = quantile(BP.Diastolic, 0.25, na.rm = TRUE), dia_q3 = quantile(BP.Diastolic, 0.75, na.rm = TRUE),
            `Respiration rate` = median(RR, na.rm = TRUE), rr_q1 = quantile(RR, 0.25, na.rm = TRUE), rr_q3 = quantile(RR, 0.75, na.rm = TRUE),
            `O~2~ saturation` = median(O2.Sat, na.rm = TRUE), o2_q1 = quantile(O2.Sat, 0.25, na.rm = TRUE), o2_q3 = quantile(O2.Sat, 0.75, na.rm = TRUE),
            
            `Hematocrit (%)` = median(Hematocrit, na.rm = TRUE), hem_q1 = quantile(Hematocrit, 0.25, na.rm = TRUE), hem_q3 = quantile(Hematocrit, 0.75, na.rm = TRUE),
            `Lactate (mmol/L)` = median(Lactate, na.rm = TRUE), lac_q1 = quantile(Lactate, 0.25, na.rm = TRUE), lac_q3 = quantile(Lactate, 0.75, na.rm = TRUE),
            `Arginine (μmol/L)` = median(Arginine.umol.L, na.rm = TRUE), arg_q1 = quantile(Arginine.umol.L, 0.25, na.rm = TRUE), arg_q3 = quantile(Arginine.umol.L, 0.75, na.rm = TRUE)) %>% 
  mutate_if(is.numeric, round, 1) %>% 
  
  mutate(`Age (years)` = paste0(`Age (years)`, " (", age_q1, ", ", age_q3, ")"),
         
         `Temperature (°C)` = paste0(`Temperature (°C)`, " (", temp_q1, ", ", temp_q3, ")"),
         `Heart rate` = paste0(`Heart rate`, " (", hr_q1, ", ", hr_q3, ")"),
         `Systolic BP` = paste0(`Systolic BP`, " (", sys_q1, ", ", sys_q3, ")"),
         `Diastolic BP` = paste0(`Diastolic BP`, " (", dia_q1, ", ", dia_q3, ")"),
         `Respiration rate` = paste0(`Respiration rate`, " (", rr_q1, ", ", rr_q3, ")"),
         `O~2~ saturation` = paste0(`O~2~ saturation`, " (", o2_q1, ", ", o2_q3, ")"),

         `Hematocrit (%)` = paste0(`Hematocrit (%)`, " (", hem_q1, ", ", hem_q3, ")"),
         `Lactate (mmol/L)` = paste0(`Lactate (mmol/L)`, " (", lac_q1, ", ", lac_q3, ")"),
         `Arginine (μmol/L)` = paste0(`Arginine (μmol/L)`, " (", arg_q1, ", ", arg_q3, ")")) %>% 
  dplyr::select(-c(age_q1, age_q3, 
                   temp_q1, temp_q3, hr_q1, hr_q3, sys_q1, sys_q3, dia_q1, dia_q3, rr_q1, rr_q3, o2_q1, o2_q3, 
                   hem_q1, hem_q3, lac_q1, lac_q3, arg_q1, arg_q3)) %>% 

  unique() %>% 
  t() %>% 
  row_to_names(row_number = 1) %>% 
  as_tibble(rownames = "variable") %>% 
  add_row(df_status, .before = 1) %>% 
  add_row(df_sex, .before = 2) %>% 
  
  dplyr::rename(" " = "variable",
                "Healthy children" = "HC",
                "Uncomplicated malaria" = "UM",
                "Cerebral malaria" = "CM")

  # remove lactate data for HC
  df_clinical_res[11,]$`Healthy children` <- "--"
  
  # export to CSV
  # write.csv(df_clinical_res, file = "Outputs/table_1.csv")
  
  # create kable df
  df_clinical_res %>% knitr::kable(align = rep('c', 4))

```
**Table 1.** A summary of subject information and hematological data. Group patient data presented as median (quartile 1, quartile 3). Note: Sex not recorded for 3 CM patients; Lactate data removed for healthy volunteers due to inconsistent data.

### Figure 1: Cerebral tissue hemoglobin concentration and oxygen saturation 

```{r fig1}


# extract variables of interest
voi <- df_results %>% colnames() %>% .[3:8] 

# select from the long df
df_hbox_voi_long <- df_hbox_long %>% 
  dplyr::filter(variable %in% voi) %>% 
  mutate(variable = factor(variable, levels = c("avg_Hb_o2sat", "avg_Hb_conc",
                                                "Hb_o2sat_overall_alpha", "Hb_conc_overall_alpha",
                                                "Hb_o2sat_second_alpha", "Hb_conc_second_alpha")))
# create color vector
my_col <- c("CM" = "#00BFC4", "HC" = "#7CAE00", "UM" = "#F8766D")

# plot
df_hbox_voi_long %>% dplyr::filter(!(grepl("alpha", variable))) %>%
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +
  facet_wrap(~variable, scales = "free_y") +
  
  scale_color_manual(values = my_col) +
  theme_bw() +
  theme(legend.position = "none")  +
  
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

```{r fig1.pvals, eval = FALSE}

# hemoglobin concentration
kruskal.test(avg_Hb_conc ~ Status, data = df_hbox)
DescTools::DunnTest(avg_Hb_conc ~ Status, data = df_hbox, method = "BH")

# hemoglobin oxygen saturation
kruskal.test(avg_Hb_o2sat ~ Status, data = df_hbox)

```

**Figure 1.** Shown are the variables measured by Oxiplex, plotted by patient status. Cerebral tissue hemoglobin concentration levels were different between cerebral malaria patients and the other two groups, but did not differ between uncomplicated malaria patients and healthy volunteers (Kruskal-Wallis adjusted p-value = 5.0x10^-7^; Dunn Test post-hoc adjusted p-values: CM vs HC = 3.2x10^-6^, CM vs UM = 2.8x10^-4^, HC vs UM = 0.87). Levels of cerebral hemoglobin O~2~ saturation did not differ among groups (Kruskal-Wallis adjusted p-value = 0.47).

### Figure 2: Detrended fluctuation analysis results

```{r fig2}

df_hbox_voi_long %>% dplyr::filter(grepl("second_alpha", variable)) %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +
  
  geom_hline(yintercept = 0.45, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 0.55, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.0, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.5, alpha = 0.4, lty = 2) +
  
  facet_wrap(~variable, scales = "free_y") +
  
  ylim(0, 1.5) +
  scale_color_manual(values = my_col) +
  theme_bw() +
  
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15),
        legend.position = "none")

```

```{r fig2.pvals, eval = FALSE}

# hemoglobin concentration alpha
kruskal.test(Hb_conc_second_alpha ~ Status, data = df_hbox)
DescTools::DunnTest(Hb_conc_second_alpha ~ Status, data = df_hbox, method = "BH")

# hemoglobin oxygen saturation alpha
kruskal.test(Hb_o2sat_second_alpha ~ Status, data = df_hbox)
DescTools::DunnTest(Hb_o2sat_second_alpha ~ Status, data = df_hbox, method = "BH")

```

**Figure 2.** Shown are the alpha values associated with the hemoglobin concentration and oxygen saturation signals measured by Oxiplex. Differences among cerebral tissue hemoglobin concentration alpha values were significant by Kruskal-Wallis (adjusted p-value = 7.16x10^-4^), and differences between uncomplicated malaria patients and the other two patient groups were significant by post-hoc Dunn Test (adjusted p-values: CM vs HV = 0.10, CM vs UM = 4.7x10^-4^, HV vs UM = 0.03). Cerebral hemoglobin O~2~ saturation alpha values differed between cerebral malaria patients and the other two patient groups (Kruskal-Wallis adjusted p-value = 4.2x10^-5^; Dunn Test post-hoc adjusted p-values: CM vs HV = 7.5x10^-4^, CM vs UM = 3.5x10^-4^, HV vs UM = 0.32). 

### Figure 3: Logistic regression model on CM vs UM

#### A. PCA
```{r fig3a, fig.height = 8}

# include variables of interest, remove healthy children from analysis

df_hbox_voi <- df_hbox %>% 
  dplyr::select(c("subject_id", "Status", "Hematocrit", "Lactate", "avg_Hb_conc", "Hb_conc_second_alpha", "avg_Hb_o2sat", "Hb_o2sat_second_alpha"))

# impute missing data with median by group
  f_impute <- function(x, na.rm = TRUE) (replace(x, is.na(x), median(x, na.rm = na.rm)))
  
  df_hbox_impute <- df_hbox_voi %>% 
    group_by(Status) %>% 
    mutate_at(3:ncol(.), f_impute) %>% 
    ungroup() 

# remove healthy volunteers for logistic regression analysis
df_hbox_log_reg <- df_hbox_impute %>% dplyr::filter(Status != "HC")

# convert tibble to df
df_hbox_voi_df <- df_hbox_log_reg %>% 
  dplyr::select(-c(subject_id, Status)) %>% 
  as.data.frame() 

# add rownames
rownames(df_hbox_voi_df) <- df_hbox_log_reg %>% .$subject_id

# compute PCA
my_pca <- prcomp(df_hbox_voi_df, scale = TRUE)

# define groups
groups <- as.factor(df_hbox_log_reg$Status)

# Eigenvalues
pca_eig_val <- get_eigenvalue(my_pca)

# two data frames for plotting
df_vars <- my_pca$rotation %>% as_tibble() %>% mutate(variable = rownames(my_pca$rotation))
df_pnts <- my_pca$x %>% as_tibble() %>% mutate(group = groups) 

# plot

  # plot patients
  ggplot() +
  geom_point(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = group), size = 3) +
  stat_conf_ellipse(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = group, fill = group),
                    level = 0.95, npoint = 100, bary = TRUE, alpha = 0.1, geom = "polygon") +
  stat_mean(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = group, shape = group),
            na.rm = FALSE) +
  
  # plot variables
  # geom_point(data = df_vars, aes(x = PC1*5, y = PC2*5)) +
  # geom_text_repel(data = df_vars, aes(x = PC1*5, y = PC2*5, label = variable, size = 10), show.legend = FALSE) +
  geom_segment(data = df_vars, aes(x = 0, y = 0, xend = PC1*5, yend = PC2*5), arrow = arrow()) +

  # draw lines
  geom_vline(xintercept = 0, lty = 2, color = "black") +
  geom_hline(yintercept = 0, lty = 2, color = "black") +
  
  # design
  xlab(paste0("PC1: ", round(pca_eig_val$variance.percent[1], 2), "% of variance")) +
  ylab(paste0("PC2: ", round(pca_eig_val$variance.percent[2], 2), "% of variance")) +

  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```

#### B. Confusion matrix

```{r fig3b_prep, eval = FALSE}

library(themis)

# select [HbT], [HbT] alpha, and hematocrit
  df_hbox_model <- df_hbox_log_reg %>% 
    dplyr::select(c(Status, Hematocrit, avg_Hb_conc, Hb_conc_second_alpha)) %>% 
    mutate(`1/10_Hb_conc_alpha` = Hb_conc_second_alpha*0.1, avg_Hb_conc_nm = avg_Hb_conc/1000) %>% 
    dplyr::select(-c(Hb_conc_second_alpha, avg_Hb_conc)) %>% 
    mutate(Status = as.factor(ifelse(Status == "CM", 1, 0))) 
  
  df_hbox_model <- df_hbox_log_reg %>% 
    dplyr::select(c(Status, Hematocrit, avg_Hb_conc, Hb_conc_second_alpha)) %>% 
    mutate(`1.10_Hb_conc_alpha` = Hb_conc_second_alpha*0.1) %>% 
    mutate(`1.10_Hb_conc_alpha_z` = (Hb_conc_second_alpha - mean(Hb_conc_second_alpha)) / sd(Hb_conc_second_alpha), 
           Hb_conc_z = (avg_Hb_conc - mean(avg_Hb_conc)) / sd(avg_Hb_conc),
           Hematocrit_z = (Hematocrit - mean(Hematocrit)) / sd(Hematocrit)) %>% 
    dplyr::select(-c(Hb_conc_second_alpha, `1.10_Hb_conc_alpha`, avg_Hb_conc, Hematocrit)) %>% 
    mutate(Status = as.factor(ifelse(Status == "CM", 1, 0))) 

# resample
set.seed(234)
hbox_boot <- bootstraps(df_hbox_model, strata = Status)#, times = 1000)

# recipe
hbox_rec <- recipe(Status ~ ., data = df_hbox_model) %>% 
  step_BoxCox(all_predictors()) %>% 
  step_nzv(all_predictors()) %>% 
  step_smote(Status) # upsample to deal with unbalanced sample sizes

# create workflow
hbox_wf <- workflow() %>% add_recipe(hbox_rec)

# logistic regression model specifications
glm_spec <- logistic_reg() %>% set_engine("glm")

# create model on 1000 different resamples
glm_res <- hbox_wf %>% 
  add_model(glm_spec) %>%
  fit_resamples(
    resamples = hbox_boot,
    metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
    control = tune::control_resamples(save_pred = TRUE, verbose = TRUE)
  )

# select best model to be used for evaluation
glm_best <- glm_res %>% select_best("roc_auc")

# finalize model
hbox_final <- hbox_wf %>% 
  add_model(glm_spec) %>% 
  finalize_workflow(glm_best) %>%
  fit(df_hbox_model) %>%
  pull_workflow_fit()

# save objects so model doesn't rerun every time we knit
save(glm_res, hbox_final, file = "glm_model.Rdata")

```   

```{r fig3b_plot, fig.height = 8}

final_res_conf_mat <- glm_res %>% 
  conf_mat_resampled() %>% 
  mutate(Prediction = ifelse(Prediction == 1, "CM", "UM")) %>% 
  mutate(Truth = ifelse(Truth == 1, "CM", "UM"))

total_true_UM <- final_res_conf_mat %>% dplyr::filter(Truth == "UM") %>% .$Freq %>% sum()
total_true_CM <- final_res_conf_mat %>% dplyr::filter(Truth == "CM") %>% .$Freq %>% sum()

final_res_conf_mat %>% 
  mutate(total_true = ifelse(Truth == "CM", Freq/total_true_CM, Freq/total_true_UM)) %>% 
  mutate(`Percent occurrence` = round(total_true*100, 2)) %>% 

  # plot
  ggplot(aes(x = Truth, y = Prediction, fill = `Percent occurrence`, label = `Percent occurrence`)) +
  geom_tile() +
  geom_text(size = 6) +
  scale_fill_gradient2(low = "#4575b4", high = "#d73027") +
  
  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.title = element_text(size = 25),
        axis.text = element_text(size = 20),
        legend.title = element_text(size = 20),
        legend.text = element_text(size = 15))

```

#### C. ROC curve of final model compared to other iterations

```{r fig3c_prep, eval = FALSE}

## write function to create model on given df

f_log_reg_model <- function(df_model) {
  
  # resample
  hbox_boot <- bootstraps(df_model, strata = Status, times = 1000)
  
  # recipe
  hbox_rec <- recipe(Status ~ ., data = df_model) %>% 
    step_BoxCox(all_predictors()) %>% 
    step_nzv(all_predictors()) %>% 
    step_smote(Status) # upsample to deal with unbalanced sample sizes
  
  # create workflow
  hbox_wf <- workflow() %>% add_recipe(hbox_rec)
  
  # logistic regression model specifications
  glm_spec <- logistic_reg() %>% set_engine("glm")
  
  # create model on resamples
  glm_res <- hbox_wf %>% 
    add_model(glm_spec) %>%
    fit_resamples(
      resamples = hbox_boot,
      metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
      control = tune::control_resamples(save_pred = TRUE, verbose = TRUE)
    )
  
  # select best model to be used for evaluation
  glm_best <- glm_res %>% select_best("roc_auc")
  
  # finalize model
  hbox_final <- hbox_wf %>% 
    add_model(glm_spec) %>% 
    finalize_workflow(glm_best) %>%
    fit(df_model) %>%
    pull_workflow_fit()
  
  # view coefficients & p-values
  model_coefs <- hbox_final %>% tidy()
  
  # return list including model and a table of best model variable statistics
  list(glm_res, model_coefs)
    
}
    
## write function to collect predictions for ROC plot    
   
f_collect_preds <- function(glm_res) {
  
      glm_res %>% 
      collect_predictions() %>% 
      roc_curve(Status, .pred_CM)

}
    

## Perform functions on all iterations of model

set.seed(123)

    # select starting variables
    df_hbox_model1 <- df_hbox_log_reg %>% 
      dplyr::select(c(Status, Hematocrit, Lactate, avg_Hb_o2sat, Hb_o2sat_second_alpha, avg_Hb_conc, Hb_conc_second_alpha)) %>% 
      mutate(Status = factor(Status, levels = c("CM", "UM")))
    
    # model 1: Status ~ Hematocrit + Lactate + avg_Hb_o2sat + Hb_o2sat_second_alpha + avg_Hb_conc + Hb_conc_second_alpha
    model1 <- f_log_reg_model(df_hbox_model1)
    roc1 <- f_collect_preds(model1[[1]]) %>% mutate(model = "1")
    model1[[2]] # remove Hb_o2sat_second_alpha (highest p-val)
    
    # model 2: Status ~ Hematocrit + Lactate + avg_Hb_o2sat + avg_Hb_conc + Hb_conc_second_alpha
    df_hbox_model2 <-  df_hbox_model1 %>% dplyr::select(-Hb_o2sat_second_alpha)
    model2 <- f_log_reg_model(df_hbox_model2)
    roc2 <- f_collect_preds(model2[[1]]) %>% mutate(model = "2")
    model2[[2]] # remove Lactate
    
    # model 3: Status ~ Hematocrit + avg_Hb_o2sat + avg_Hb_conc + Hb_conc_second_alpha
    df_hbox_model3 <-  df_hbox_model2 %>% dplyr::select(-Lactate)
    model3 <- f_log_reg_model(df_hbox_model3)
    roc3 <- f_collect_preds(model3[[1]]) %>% mutate(model = "3")
    model3[[2]] # remove avg_Hb_o2sat

    # model 4: Status ~ Hematocrit + avg_Hb_conc + Hb_conc_second_alpha
    df_hbox_model4 <-  df_hbox_model3 %>% dplyr::select(-avg_Hb_o2sat)
    model4 <- f_log_reg_model(df_hbox_model4)
    roc4 <- f_collect_preds(model4[[1]]) %>% mutate(model = "4")
    model4[[2]] # none to remove, final model

save(roc1, roc2, roc3, file = "roc_curves.Rdata")

# calculate areas under curves

library(MESS)
roc1 <- roc1 %>% mutate(false_pos = 1 - specificity)
auc(roc1$false_pos, roc1$sensitivity)

roc2 <- roc2 %>% mutate(false_pos = 1 - specificity)
auc(roc2$false_pos, roc2$sensitivity)

roc3 <- roc3 %>% mutate(false_pos = 1 - specificity)
auc(roc3$false_pos, roc3$sensitivity)

roc_final <- glm_res %>% 
  collect_predictions() %>% 
  roc_curve(Status, .pred_0) %>% 
  mutate(false_pos = 1 - specificity)
auc(roc_final$false_pos, roc_final$sensitivity)


```

```{r fig3c_plot, fig.height = 8}

df_roc <- glm_res %>% 
  collect_predictions() %>% 
  roc_curve(Status, .pred_0) %>% 
  mutate(model = "final") %>% 
  bind_rows(roc1, roc2, roc3) 

ggplot() +
  geom_path(data = df_roc %>% filter(model == "final"), 
            aes(x = 1 - specificity, y = sensitivity, color = model), 
            size = 2.5) +
  geom_path(data = df_roc %>% filter(model != "final"), 
            aes(x = 1 - specificity, y = sensitivity, color = model), 
            size = 1) +
  geom_abline(lty = 3) +
  labs(x = "False positive rate", y = "True positive rate") +
  theme_bw() +
  
  theme(text = element_text(family = "Arial"),
        axis.title = element_text(size = 25),
        axis.text = element_text(size = 20),
        legend.title = element_text(size = 20),
        legend.text = element_text(size = 15),
        legend.position = "top")

```


**Figure 3.** Logistic regression model discriminating CM vs UM patients using cerebral tissue hemoglobin concentration, cerebral tissue hemoglobin concentration alpha, and hematocrit as the regressors **(A)** PCA of patients with CM and UM as determined by NIRS variables. Color indicates patient status, ellipses show median and 95% confidence interval on the cluster. **(B)** Confusion matrix representing the results of the logistic regression model averaged across 1000 bootstrap resamples. Percent occurrence indicates percentage times that the event (shown by the x- and y-axes) occurred.

### Table 2. Logistic regression model evaluation

#### A. Model metrics
```{r table2a}

table2a <- collect_metrics(glm_res) %>% 
  dplyr::select(.metric, mean, std_err) %>% 
  mutate(mean = mean*100, std_err = std_err*100) %>% 
  mutate(Metric = c("Accuracy", "ROC AUC", "Sensitivity", "Specificity")) %>% 
  dplyr::select(c("Metric", "mean", "std_err")) %>% 
  
  dplyr::rename("Mean" = "mean", "Standard error" = "std_err") %>% 
  mutate_if(is.numeric, round, 2) %>% 
  as.data.frame()
    
table2a %>% knitr::kable(align = rep('c', 4))

```

#### B. Coefficients
```{r table2b}

table2b <- hbox_final %>% 
  tidy(exponentiate = TRUE) %>% 
  mutate(estimate = ifelse(estimate < 10^-2, scientific(estimate, digits = 3), round(estimate, 2))) %>% 
  mutate(p.value = scientific(p.value, digits = 3),
         std.error = round(std.error, 2),
         statistic = round(statistic, 2)) %>% 
  dplyr::rename("Term" = "term", "Odds ratio" = "estimate", "Standard error" = "std.error", 
                "t-statistic" = "statistic", "p-value" = "p.value") %>% 
  as.data.frame() 

table2b %>% knitr::kable(align = rep('c', 5))

```

```{r export_table2, eval = FALSE}

list(table2a, table2b) %>% write.csv("Outputs/table_2.csv")

```


**Table 2.** Evaluation of the best logistic regression model select from 1000.


### Figure 4: Relationships among hemoglobin concentration and hematocrit
```{r fig4, fig.height = 8}

df_hbox %>% filter(Status != "HC") %>% 
  
  ggplot(aes(x = avg_Hb_conc, y = Hematocrit)) +
  geom_point(aes(color = Status), alpha = 0.7, size = 3) +
  
  xlab("Hemoglobin concentration (μM)") +
  ylab("Hematocrit (%)") +

  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```

**Figure 4.** 

### Figure 5: Cerebral malaria patient follow-up visits

```{r fig5_data}


# reorganize for plotting
df_results_all_plot <- df_brain_dfa_results %>% 
  dplyr::filter(!is.na(avg_Hb_conc_fu)) %>% 
  pivot_longer(cols = 3:ncol(.), names_to = "variable", values_to = "value") %>% 
  mutate(follow_up = ifelse(grepl("fu", variable), "follow-up", "initial visit")) %>% 
  mutate(follow_up = factor(follow_up, levels = c("initial visit", "follow-up")))

# descriptive statistics


  # averages

    # THC
    df_thc_compare <- df_results_all_plot %>% 
      dplyr::filter(variable %in% c("avg_Hb_conc", "avg_Hb_conc_fu"))
    
    thc_fu_pval <- wilcox.test(value ~ follow_up, data = df_thc_compare) %>% .$p.value
    
    # O2 sat
    df_o2sat_compare <- df_results_all_plot %>% 
      dplyr::filter(variable %in% c("avg_Hb_o2sat", "avg_Hb_o2sat_fu"))
    
    o2sat_fu_pval <- wilcox.test(value ~ follow_up, data = df_o2sat_compare) %>% .$p.value


  # alphas

    # THC
    df_thc_alpha_compare <- df_results_all_plot %>% 
      dplyr::filter(variable %in% c("Hb_conc_overall_alpha", "Hb_conc_fu_alpha"))
    
    thc_alpha_fu_pval <- wilcox.test(value ~ follow_up, data = df_thc_alpha_compare) %>% .$p.value
    
    # O2 sat
    df_o2sat_alpha_compare <- df_results_all_plot %>% 
      dplyr::filter(variable %in% c("Hb_o2sat_overall_alpha", "Hb_o2sat_fu_alpha"))
    
    o2sat_alpha_fu_pval <- wilcox.test(value ~ follow_up, data = df_o2sat_alpha_compare) %>% .$p.value

    
```

#### A: Cerebral hemoglobin oxygen saturation & hemolgobin concentration
```{r fig5a}

# create color vector
my_col_fu <- c("initial visit" = "#B80F0A", "follow up" = "black")

# plot
df_results_all_plot %>% 
  
  dplyr::filter(grepl("avg_Hb", variable)) %>% 
  mutate(measurement = ifelse(grepl("conc", variable), "THC", "O2_sat")) %>% 
  mutate(measurement = factor(measurement, levels = c("O2_sat", "THC"))) %>% 
  mutate(pvals = ifelse(measurement == "THC", round(thc_fu_pval, 3), round(o2sat_fu_pval, 3))) %>% 
  mutate(ycoord = ifelse(measurement == "THC", 400, 100)) %>% 
  
  ggplot(aes(x = follow_up, y = value)) +
  geom_violin(aes(color = follow_up)) +
  geom_point(shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(x = follow_up, color = follow_up), size = 0.2, width = 0.5) +
  geom_path(aes(group = subject_id), lty = 2, alpha = 0.7) +
  
  # geom_segment(aes(x = 1, y = ycoord, xend = 2, yend = ycoord), size = 0.3) +
  # geom_segment(aes(x = 1, y = ycoord, xend = 1, yend = ycoord - .04*ycoord), size = 0.3) +
  # geom_segment(aes(x = 2, y = ycoord, xend = 2, yend = ycoord - .04*ycoord), size = 0.3) +
  # geom_text(x = 1.5, aes(y = ycoord + .025*ycoord, label = paste0("p = ", pvals))) +
  
  facet_wrap(~measurement, scales = "free_y") +
  labs(y = "", x = "") +
  scale_color_manual(values = my_col_fu) +

  theme_bw() +
  theme(legend.position = "none") +
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 15),
        legend.position = "none")

```
No difference from initial visit to follow up - Hb_oxy pval: 0.90; Hb_tot pval: 0.32

#### B: Cerebral hemoglobin oxygen saturation & hemolgobin concentration alphas
```{r fig5b} 

library(scales)

df_results_all_plot %>% 
  
  dplyr::filter(!grepl("avg|second", variable)) %>% 
  mutate(measurement = ifelse(grepl("conc", variable), "THC", "O2_sat")) %>% 
  mutate(measurement = factor(measurement, levels = c("O2_sat", "THC"))) %>% 
  mutate(pvals = ifelse(measurement == "THC", round(thc_alpha_fu_pval, 3), round(o2sat_alpha_fu_pval, 3))) %>% 
  
  ggplot(aes(x = follow_up, y = value)) +
  geom_violin(aes(color = follow_up)) +
  geom_point(shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(x = follow_up, color = follow_up), size = 0.2, width = 0.5) +
  geom_path(aes(group = subject_id), lty = 2, alpha = 0.7) +
  
  geom_hline(yintercept = 0.45, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 0.55, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.0, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.5, alpha = 0.4, lty = 2) +

  geom_segment(aes(x = 1, y = 0.93, xend = 2, yend = 0.93), size = 0.3) +
  geom_segment(aes(x = 1, y = 0.93, xend = 1, yend = 0.89), size = 0.3) +
  geom_segment(aes(x = 2, y = 0.93, xend = 2, yend = 0.89), size = 0.3) +
  geom_text(x = 1.5, y = 0.96, aes(label = paste0("p = ", pvals))) +
  
  facet_wrap(~measurement) +
  
  ylim(0.45, 1.1) +
  labs(y = "alpha", x = "") +
  scale_color_manual(values = my_col_fu) +

  theme_bw() +
  theme(legend.position = "none") +
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 15),
        legend.position = "none")

```

### Figure 6: Muscle hemoglobin concentration and oxygen saturation data from initial visit and follow-up

#### A. Muscle hemoglobin concentration and oxygen saturation

```{r fig6A}

# select from the long df
df_muscle_long <- df_muscle_dfa_results %>% 
  dplyr::select(c(Status, avg_Hb_conc_muscle, avg_Hb_o2sat_muscle)) %>% 
  pivot_longer(2:3, names_to = "variable") %>% 
  mutate(Status = factor(Status, levels = c("HC", "UM", "CM")),
         variable = factor(variable, levels = c("avg_Hb_o2sat_muscle", "avg_Hb_conc_muscle")))
  
# create color vector
my_col <- c("CM" = "#00BFC4", "HC" = "#7CAE00", "UM" = "#F8766D")

# plot
df_muscle_long %>% dplyr::filter(Status != "?") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +
  facet_wrap(~variable, scales = "free_y") +
  
  scale_color_manual(values = my_col) +
  theme_bw() +
  theme(legend.position = "none")  +
  
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

```{r fig6a.pvals, eval = FALSE}

# hemoglobin concentration
kruskal.test(avg_Hb_conc_muscle ~ Status, data = df_muscle_dfa_results)

# hemoglobin oxygen saturation
kruskal.test(avg_Hb_o2sat_muscle ~ Status, data = df_muscle_dfa_results)

```
No significant differences: Hb_oxy pval = 0.11 by KW; Hb_tot pval = 0.051 by KW

#### B. Muscle DFA results
```{r fig6B}

# select from the long df
df_muscle_dfa_long <- df_muscle_dfa_results %>% 
  dplyr::select(c(Status, muscle_Hb_conc_alpha, muscle_Hb_o2sat_alpha)) %>% 
  pivot_longer(2:3, names_to = "variable") %>% 
  mutate(Status = factor(Status, levels = c("HC", "UM", "CM")),
         variable = factor(variable, levels = c("muscle_Hb_o2sat_alpha", "muscle_Hb_conc_alpha")))
  
# create color vector
my_col <- c("CM" = "#00BFC4", "HC" = "#7CAE00", "UM" = "#F8766D")

# plot
df_muscle_dfa_long %>% dplyr::filter(Status != "?") %>% 
  
  ggplot(aes(x = Status, y = value)) +
  geom_violin(aes(color = Status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = Status), size = 0.2, width = 0.5) +
  
  geom_hline(yintercept = 0.45, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 0.55, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.0, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.5, alpha = 0.4, lty = 2) +
  
  facet_wrap(~variable) +
  ylim(0, 1.5) +
  
  scale_color_manual(values = my_col) +
  theme_bw() +
  theme(legend.position = "none")  +
  
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```

```{r fig6b.pvals, eval = FALSE}

# hemoglobin concentration alpha
kruskal.test(muscle_Hb_conc_alpha ~ Status, data = df_muscle_dfa_results)

# hemoglobin oxygen saturation alpha
kruskal.test(muscle_Hb_o2sat_alpha ~ Status, data = df_muscle_dfa_results)

```
No significant differences: Hb_oxy pval = 0.08 by KW; Hb_tot pval = 0.15 by KW

#### C. CM follow-up muscle results
```{r fig6c_data}

# reorganize for plotting
df_muscle_fu_plot <- df_muscle_dfa_results %>% 
  dplyr::filter(!is.na(avg_Hb_o2sat_muscle_fu)) %>% 
  pivot_longer(cols = 3:ncol(.), names_to = "variable", values_to = "value") %>% 
  mutate(follow_up = ifelse(grepl("fu", variable), "follow-up", "initial visit")) %>% 
  mutate(follow_up = factor(follow_up, levels = c("initial visit", "follow-up")))


# descriptive statistics

  # averages

    # THC
    df_thc_muscle_compare <- df_muscle_fu_plot %>% 
      dplyr::filter(variable %in% c("avg_Hb_conc_muscle", "avg_Hb_conc_muscle_fu"))
    
    thc_muscle_fu_pval <- wilcox.test(value ~ follow_up, data = df_thc_muscle_compare) %>% .$p.value
    
    # O2 sat
    df_o2sat_muscle_compare <- df_muscle_fu_plot %>% 
      dplyr::filter(variable %in% c("avg_Hb_o2sat_muscle", "avg_Hb_o2sat_muscle_fu"))
    
    o2sat_muscle_fu_pval <- wilcox.test(value ~ follow_up, data = df_o2sat_muscle_compare) %>% .$p.value


  # alphas

    # THC
    df_thc_muscle_alpha_compare <- df_muscle_fu_plot %>% 
      dplyr::filter(variable %in% c("muscle_Hb_conc_alpha", "muscle_Hb_conc_fu_alpha"))
    
    thc_alpha_muscle_fu_pval <- wilcox.test(value ~ follow_up, data = df_thc_muscle_alpha_compare) %>% .$p.value
    
    # O2 sat
    df_o2sat_muscle_alpha_compare <- df_muscle_fu_plot %>% 
      dplyr::filter(variable %in% c("muscle_Hb_o2sat_alpha", "muscle_Hb_o2sat_fu_alpha"))
    
    o2sat_alpha_muscle_fu_pval <- wilcox.test(value ~ follow_up, data = df_o2sat_muscle_alpha_compare) %>% .$p.value

```

Muscle hemoglobin concentration and hemoglobin oxygen saturation
```{r fig6c_avgs}

df_muscle_fu_plot %>% 
  
  dplyr::filter(grepl("avg_Hb", variable)) %>% 
  mutate(measurement = ifelse(grepl("conc", variable), "THC", "O2_sat")) %>% 
  mutate(measurement = factor(measurement, levels = c("O2_sat", "THC"))) %>% 
  mutate(pvals = ifelse(measurement == "THC", round(thc_muscle_fu_pval, 3), round(o2sat_muscle_fu_pval, 3))) %>% 
  mutate(ycoord = ifelse(measurement == "THC", 400, 100)) %>% 
  
  ggplot(aes(x = follow_up, y = value)) +
  geom_violin(aes(color = follow_up)) +
  geom_point(shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(x = follow_up, color = follow_up), size = 0.2, width = 0.5) +
  geom_path(aes(group = subject_id), lty = 2, alpha = 0.7) +
  
  # geom_segment(aes(x = 1, y = ycoord, xend = 2, yend = ycoord), size = 0.3) +
  # geom_segment(aes(x = 1, y = ycoord, xend = 1, yend = ycoord - .04*ycoord), size = 0.3) +
  # geom_segment(aes(x = 2, y = ycoord, xend = 2, yend = ycoord - .04*ycoord), size = 0.3) +
  # geom_text(x = 1.5, aes(y = ycoord + .025*ycoord, label = paste0("p = ", pvals))) +
  
  facet_wrap(~measurement, scales = "free_y") +
  labs(y = "", x = "") +
  scale_color_manual(values = my_col_fu) +

  theme_bw() +
  theme(legend.position = "none") +
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 15),
        legend.position = "none")

```
No difference in hemoglobin concentration from initial visit to follow up - Hb_tot pval: 0.72. However, there was a significant drop in muscle hemoglobin oxygen saturation at the follow-up compared to initial visit (p = 0.044).

Muscle DFA results
```{r fig6c_dfa}

df_muscle_fu_plot %>% 
  
  dplyr::filter(!grepl("avg_Hb", variable) & !grepl("second", variable)) %>% 
  mutate(measurement = ifelse(grepl("conc", variable), "THC", "O2_sat")) %>% 
  mutate(measurement = factor(measurement, levels = c("O2_sat", "THC"))) %>% 
  mutate(pvals = ifelse(measurement == "THC", round(thc_muscle_fu_pval, 3), round(o2sat_muscle_fu_pval, 3))) %>% 
  mutate(ycoord = ifelse(measurement == "THC", 400, 100)) %>% 
  
  ggplot(aes(x = follow_up, y = value)) +
  geom_violin(aes(color = follow_up)) +
  geom_point(shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(x = follow_up, color = follow_up), size = 0.2, width = 0.5) +
  geom_path(aes(group = subject_id), lty = 2, alpha = 0.7) +
  
  geom_hline(yintercept = 0.45, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 0.55, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.0, alpha = 0.4, lty = 2) +
  geom_hline(yintercept = 1.5, alpha = 0.4, lty = 2) +

  # geom_segment(aes(x = 1, y = ycoord, xend = 2, yend = ycoord), size = 0.3) +
  # geom_segment(aes(x = 1, y = ycoord, xend = 1, yend = ycoord - .04*ycoord), size = 0.3) +
  # geom_segment(aes(x = 2, y = ycoord, xend = 2, yend = ycoord - .04*ycoord), size = 0.3) +
  # geom_text(x = 1.5, aes(y = ycoord + .025*ycoord, label = paste0("p = ", pvals))) +
  
  facet_wrap(~measurement) +
  ylim(0.45, 1.05) +
  labs(y = "", x = "") +
  scale_color_manual(values = my_col_fu) +

  theme_bw() +
  theme(legend.position = "none") +
  theme(text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 15),
        legend.position = "none")

```
No difference from initial visit to follow up - Hb_oxy pval: 0.17; Hb_tot pval: 0.10.

### Figure 7: Hemoglobin concentration trends in extravascular edema vs vascular engorgement

```{r fig7, fig.width = 12, eval = FALSE }

df_fig6 <- tibble(edema = rnorm(n = 90, mean = 20, sd = 20),
                  normal = rnorm(n = 90, mean = 70, sd = 20),
                  engorge = rnorm(n = 90, mean = 130, sd = 20)) %>% 
  pivot_longer(1:3, names_to = "variable", values_to = "value") %>% 
  mutate(variable = factor(variable, levels = c("edema", "normal", "engorge")))

df_fig6 %>% 
  ggplot(aes(x = variable, y = value)) +
  geom_point(shape = 1, size = 3, position = position_jitter(0.1)) +
  stat_summary(fun = "median", geom = "crossbar", size = 0.2, width = 0.5) +
  labs(x = "", y = "") +
  theme_classic() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank())



```

### Supplementary figure 1: Spearman correlations of potential regressors
```{r fig_s1, fig.show = "hold", out.width = "50%", fig.height = 12, fig.width = 15}

# calculate spearman correlations in each group

f_calc_cor <- function(df, status) {
  
  df %>%
    dplyr::filter(Status == status) %>% 
    dplyr::select(-c(subject_id, Status)) %>% 
    cor(method = "spearman")
  
}

# create run correlations of potential regressors
m_cor_hv <- df_hbox_impute %>% f_calc_cor(status = "HC")

m_cor_um <- df_hbox_impute %>% f_calc_cor(status = "UM")

m_cor_cm <- df_hbox_impute %>% f_calc_cor(status = "CM")

# correlation matrix

# write plot function
f_plot_cor <- function(mat) {
  
  melt(mat) %>% 
    
    mutate_if(is.factor, factor, levels = c("Lactate", "Hematocrit", "avg_Hb_o2sat", "avg_Hb_conc", "Hb_o2sat_second_alpha", "Hb_conc_second_alpha")) %>% 
    
    ggplot(aes(x = X1, y = X2, fill = value, label = round(value, 2))) + 
    geom_tile() +
    scale_fill_gradient2(low = "#4575b4", high = "#d73027") +
    geom_text(size = 9) +
    
    xlab("") +
    ylab("") +
    theme_bw() +
    theme(text = element_text(family = "Arial"),
          axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text = element_text(size = 35),
          legend.text = element_text(size = 40),
          legend.title = element_text(size = 50),
          legend.key.size = unit(2, 'cm'),
          plot.title = element_text(size = 30)
    )
  
}

  # extract legend
  p_leg <- m_cor_hv %>% f_plot_cor() %>% get_legend()
  
  # create plots
  m_cor_hv %>% f_plot_cor() + ggtitle("Healthy chilren") + theme(legend.position = "none")
  
  m_cor_um %>% f_plot_cor() + ggtitle("Uncomplicated malaria") + theme(legend.position = "none")
  
  m_cor_cm %>% f_plot_cor() + ggtitle("Cerebral malaria") + theme(legend.position = "none")
  
  as_ggplot(p_leg)

```

**Figure S1.** Shown are the spearman correlations among hematocrit, lactate, hemoglobin concentration and its alpha, and hemoglobin oxygen saturation and its alpha. Red indicates a positive correlation; blue indicates a negative correlation.

### Supplementary figure 2: Logistic regression curves of potential regressors
```{r fig_s2, fig.height = 12}

# pivot longer and change factor levels
df_hbox_reg_long <- df_hbox_impute %>% 
  mutate(Status = as.numeric(ifelse(Status == "CM", 1, 0))) %>% 
  pivot_longer(3:ncol(.), names_to = "variable", values_to = "value") %>% 
  mutate(variable = factor(variable, levels = c("Lactate", "Hematocrit", "avg_Hb_o2sat", "avg_Hb_conc", "Hb_o2sat_second_alpha", "Hb_conc_second_alpha")))

# plot as individual regression curves
df_hbox_reg_long %>% 
  
  ggplot(aes(x = value, y = Status)) +
  geom_point() +
  geom_smooth(method = "glm", method.args = list(family = "binomial")) +
  facet_wrap(~variable, scales = "free_x", nrow = 5, ncol = 2) +
  
  ylab("Status: 1 = CM, 0 = UM") +
  xlab("") +
  
  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20),
        strip.text = element_text(size = 20))

```

**Figure S2.** Plotted are the individual logistic regression curves of individual variables tested for their discriminatory power between UM and CM. On the y-axis, 1 represents CM while 0 represents UM. A steeper curve indicates higher discriminatory power.



### Supplementary figure 3: Detrended fluctuation analysis 

#### A. Pre-processed signal
```{r fig_s3a}

df_thc_cumsum1 %>% 
  
  ggplot(aes(x = Time, y = sg_filt)) +
  geom_line() +
  
  labs(y = "Hemoglobin concentration (μM)", x = "Time (s)") +
  ggtitle("Pre-processed signal") +
  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))


```

#### B. Cumulative sum
```{r fig_s3b}

df_thc_cumsum1 %>% 
  
  ggplot(aes(x = Time, y = cumsum)) +
  geom_line() +
  geom_vline(xintercept = 1201, lty = 2, color = "black") +
  geom_vline(xintercept = 1210, lty = 2, color = "black") +
  
  labs(y = "Cumulative sum", x = "Time (s)") +
  ggtitle("Cumulative signal") +
  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```

#### C. Window detrend
```{r fig_s3c, fig.show = "hold", out.width = "50%"}

fit <- lm(cumsum ~ Time, data = df_thc_cumsum1 %>% dplyr::filter(Time > 1201 & Time < 1210))

# Window 
df_thc_cumsum1 %>% dplyr::filter(Time > 1201 & Time < 1210) %>% 
  
  ggplot(aes(x = Time, y = cumsum)) +
  geom_line(size = 2) +
  geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], color = "black", lty = 2, size = 2) +
  
  labs(y = "Cumulative sum", x = "Time (s)") +
  ggtitle("Cumulative signal") +
  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20),
        title = element_text(size = 25))


# Detrended
df_thc_cumsum1 %>% dplyr::filter(Time > 1201 & Time < 1210) %>% 
  
  ggplot(aes(x = Time, y = fit$residuals)) +
  geom_line(size = 2) +
  geom_hline(yintercept = 0, color = "black", lty = 2, size = 2) +
  
  labs(y = "", x = "Time (s)") +
  ggtitle("Detrended cumulative signal") +
  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 25),
        axis.title = element_text(size = 28),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20),
        title = element_text(size = 25))



```

#### D. Results plot
```{r fig_s3d}

# find slope of segmented line
library(segmented)
df1 <- l_dfa_thc[[1]][[1]] %>% mutate(log10_avg_fluct = log10(avg_fluctuation), log10_window_size = log10(window_size))
fit1 <- lm(log10_avg_fluct ~ log10_window_size, data = df1) 
segfit1 <- segmented(fit1)
slopes1 <- coef(segfit1) # the coefficients are the differences in slope in comparison to the previous slope

# first line: 
#y = b0 + b1*x; y = intercept1 + slope1 * x

# second line:
# y = c0 + c1*x; y = intercept2 + slope2 * x

# At the breakpoint (break1), the segments b and c intersect: b0 + b1*x = c0 + c1*x
b0 <- slopes1[[1]]
b1 <- slopes1[[2]]

c1 <- slopes1[[2]] + slopes1[[3]]
break1 <- segfit1$psi[[2]]

# Solve for c0 (intercept of second segment):
c0 <- b0 + b1 * break1 - c1 * break1

df1 %>% 
  
  ggplot(aes(x = log10_window_size, y = log10_avg_fluct)) +
  geom_point(size = 3, shape = 1) +
  geom_abline(intercept = c0, slope = c1, color = "black", lty = 2) +
  annotate("text", x = -0.5, y = 2, label = paste0("alpha = ", round(c1, 2)), size = 8) +
  
  labs(x = "log10(window length)", y = "log10(average fluctuation)") +
  ggtitle("DFA results") +
  
  theme_bw() +
  theme(text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```












